library(bigMap)
library(bigmemory)
# source scrìpts to compute/plot the co-occurrence matrix
source('../scripts/heatPalette.R')
source('../scripts/cooMtx.R')
Data
data <- read.table('../Data/data36.csv', sep = ';', header = T)
species <- data$X
regions <- colnames(data[, 2:37])
# transpose
data <- t(as.matrix(data[, 2:37]))
colnames(data) <- species
# input data big.matrix (a requisite to compute quality)
Dbm <- as.big.matrix(data, type = 'double')
# init model
m <- bdm.init('D36', data)
Explore a range of perplexities
# range of perplexities
ptsne.ppx <- seq(12, 28, by = 2)
pakde.ppx <- round(seq(4, 7, length.out = length(ptsne.ppx)), 0)
# runs
m.list1 <- lapply(seq_along(ptsne.ppx), function(p) {
m.ppx <- bdm.ptsne(m, threads = 4, layers = 4, rounds = 9, whiten = 0, theta = .0, ppx = ptsne.ppx[p], info = 0)
m.ppx <- bdm.pakde(m.ppx, threads = 2, ppx = pakde.ppx[p])
m.ppx <- bdm.wtt(m.ppx)
m.ppx <- bdm.qlty(m.ppx, inp.data = Dbm, threads = 4, layers = 4, rounds = 2, ret.qlty = T, qm = 'kn', verbose = F)
m.ppx
})
save(m.list1, file = './mlist1.RData')
# to run from scratch evaluate manually the previous chunk
load('./mlist1.RData')
Kary-neighborhood preservation
bdm.qlty.plot(m.list1, ppxfrmt = 0)

ptSNE.ppx = 20, pakde.ppx = 4
Assess clustering stability
ptsne.ppx <- 20
pakde.ppx <- 4
m.list2 <- lapply(1:50, function(i) {
m.ppx <- bdm.ptsne(m, threads = 10, layers = 9, rounds = 10, whiten = 0, theta = .0, ppx = ptsne.ppx, info = 0)
m.ppx <- bdm.pakde(m.ppx, threads = 2, ppx = pakde.ppx)
m.ppx <- bdm.wtt(m.ppx)
m.ppx
})
save(m.list2, file = './mlist2.RData')
# to run from scratch evaluate manually the previous chunk
load('./mlist2.RData')
Clustering co-ocurrence matrix
cooMtx.plot(m.list2, nrow(data), regions)

ptSNE.ppx = 22, pakde.ppx = 7
Assess clustering stability
ptsne.ppx <- 22
pakde.ppx <- 7
m.list2 <- lapply(1:50, function(i) {
m.ppx <- bdm.ptsne(m, threads = 10, layers = 9, rounds = 10, whiten = 0, theta = .0, ppx = ptsne.ppx, info = 0)
m.ppx <- bdm.pakde(m.ppx, threads = 2, ppx = pakde.ppx)
m.ppx <- bdm.wtt(m.ppx)
m.ppx
})
save(m.list3, file = './mlist2.RData')
# to run from scratch evaluate manually the previous chunk
load('./mlist3.RData')
Clustering co-ocurrence matrix
cooMtx.plot(m.list3, nrow(data), regions)

Comparing Clustering membership (top-1 model)
# choose top-1 model
L.ppx20 <- bdm.labels(m.list2[[top3.ppx20[1]]])
L.ppx22 <- bdm.labels(m.list3[[top3.ppx22[1]]])
ix <- sort(L.ppx22, index.return = T)$ix
cbind(regions, L.ppx22, L.ppx20)[ix, ]
## regions L.ppx22 L.ppx20
## [1,] "Prefrontal_White" "1" "6"
## [2,] "Prefrontal_Gray" "1" "6"
## [3,] "Frontal_Motor_White" "1" "4"
## [4,] "FrontalMotorGray" "1" "2"
## [5,] "ParTemp_White" "1" "2"
## [6,] "ParTemp_Gray" "1" "4"
## [7,] "Lateral_Cerebellum" "1" "6"
## [8,] "Cerebellum_NucleusDentate" "1" "3"
## [9,] "Pons_ventral" "1" "2"
## [10,] "Striatum" "1" "2"
## [11,] "Inf_Olive_Primary" "1" "4"
## [12,] "Amyg_cortico_baso_lateral" "1" "2"
## [13,] "Amyg_centro_medio_anterior" "1" "2"
## [14,] "Diencephalon" "1" "2"
## [15,] "Mesencephalon" "1" "2"
## [16,] "Hypoglossal" "1" "2"
## [17,] "Piriformcortex" "1" "2"
## [18,] "Paleocortex" "1" "2"
## [19,] "Subiculum" "2" "1"
## [20,] "Schizocortex" "2" "1"
## [21,] "CA1" "2" "1"
## [22,] "CA2" "2" "1"
## [23,] "CA3" "2" "1"
## [24,] "Hilus" "2" "1"
## [25,] "FasciaDentata" "2" "1"
## [26,] "Inf_Olive_Med_Acc" "3" "5"
## [27,] "Inf_Olive_Dorsal_Acc" "3" "5"
## [28,] "Medial_Cerebellum" "3" "5"
## [29,] "Cerebellum_Nucleus_Fastigial" "3" "5"
## [30,] "Cerebellum_Nucleus_Interposed" "3" "5"
## [31,] "Medulla_Nucleus_Superior" "3" "5"
## [32,] "Medulla_Nucleus_Lateral" "3" "5"
## [33,] "Medulla_Nucleus_Medial" "3" "5"
## [34,] "Medulla_NucleusDescendens" "3" "5"
## [35,] "Trigeminal" "3" "5"
## [36,] "Facial" "3" "8"
Quantile heat-map. (for top1 model with ppx = 22)
m.top1 <- m.list3[[top3.ppx22[1]]]
bdm.qMap(m.top1, data = data[, 1:9], qMap.cex = 1, qtitle = 'primateBrains_36')

bdm.qMap(m.top1, data = data[, 10:17], qMap.cex = 1, qtitle = 'primateBrains_36')

Comments
This is run with the latest version of the ptSNE, bigMap_3.7.9, featuring some improvements with respect to former versions. In particular, it solves a bug in the computation of kernels’ precision (beta parameter) in bigMap_3.7.2 (used in the previous report), so that it is now as accurate as bigMap_2.x.x but faster.
I compare the option you suggest (ptsne.ppx = 20, pakde.ppx = 4) with (ptsne.ppx = 22, pakde.ppx = 7), as both suggest similar clustering paterns and show a fairly balanced neighborhood preservation. Taking as high as possible parameters is a guarantee of more robustness in the results. As you can see, the co-ocurrence matrix is better and
To asses the robustness of the output I run 50 times the same model and compute a clustering co-ocurrence matrix. For this runs, I’m setting (threads = 10, layers = 9) so that we have a thread.size of 90% of the dataset size. This lets the datapoints move arround more freely in each layer bringing some extra degree of freedom into the global solution.
Results:
using ppx = 20 I do not get the same exact cluster as you get, though with major similarities.
using ppx = 22 we get a very robust cluster 1 integrating what you get as clusters 3 and 5 (plus some other from your cluster 2), we get cluster 2 that exactly matches your cluster 1, and a cluster 3 that almost matches what you report as cluster 4.
Side note: From my experience I know that ridge shaped k-ary curves like the ones we have here (see section k-ary neighborhood preservation) are usually a hint of some fractality or hierarchycal structure in the data, but it seems unlikely to me to have this kind of structure with so few data. Does this make any sense to you?